The objective of this analysis is to model and forecast electricity pricing time series data in the Dallas, TX area. Our client is the Dallas local government, and they have requested the forecasts for economic and infrastructure planning. The electricity pricing data is from the St. Louis Federal Reserve. We chose to model the data from January 2000 to present. Data from earlier dates had different behavior because it was before Texas deregulated electricity pricing. This analysis explores the use of univariate and multivariate time series models. The multivariate models include the addition of the variables Dallas temperature, Dallas gasoline prices, and southern urban consumer price index (CPI). These additional variables were obtained from the National Weather Service, the St. Louis Federal Reserve, and the Bureau of Labor Statistics. Univariate methods include various ARIMA models and signal plus noise. Multivariate methods include VAR, neural network (MLP), and ensemble models.
library(tswge)
library(dplyr)
library(tidyverse)
library(readxl)
library(vars)
library(nnfor)
# Set this to data folder for your file
#setwd("/Users/riccoferraro/Documents/SMU/TimeSeries/TimeSeriesFinalProject/data")
setwd("/Users/mingyang/Desktop/SMU/TimeSeries/TimeSeriesFinalProject/data")
# Read in DFWA electricity Data
dfwa.electricity = read.csv("AVG_ELEC_DFWA_TX.csv",col.names = c("DATE", "AVG_EP"),
colClasses = c(DATE="character", AVG_EP="character"))
# Read in CPI data for Southern Urban area
cpi = read_excel("SouthernUrbanCPI.xlsx",sheet = "BLS Data Series", skip = 11)
# Getting rid of Half1 and Half2 which starts with S
cpi = cpi %>% filter(!grepl('S',Period) )
# Getting rid of Annual which labeled as M13
cpi = cpi %>% filter(!grepl('M13',Period) )
# Read in Gas Price data from same area
gas.price = read.csv("Dallas_FWA_GAS.csv")
# Read in Temperature Data & cleaning
temp = read_excel("DallasAreaTemp.xlsx", sheet = "Sheet1")
temp = temp %>% tidyr::pivot_longer(
cols = starts_with("Mon_"),
names_to = "Month",
values_to = "Temperature"
)
temp = temp[1:386,]
# subset dataset
# which(dfwa.electricity$DATE=="1990-01-01") # 135
dfwa.electricity = dfwa.electricity[135:520,]
rownames(dfwa.electricity) <- 1:nrow(dfwa.electricity)
# which(cpi$Year==1990 & cpi$Period=="M01")
cpi = cpi[91:476,]
rownames(cpi) <- 1:nrow(cpi)
# which(gas.price$DATE=="1990-01-01") # 145
gas.price = gas.price[145:530,]
rownames(gas.price) <- 1:nrow(gas.price)
#### Creating ultimate data frame under variable 'df' ####
df = dfwa.electricity
df$CPI = cpi$Value
df$GAS_P = gas.price$APUS37A7471A
df$AVG_EP = as.numeric(df$AVG_EP)
df$TEMP = temp$Temperature
#### Due to distribution market deregulation in 1995, team decided to cut the realization
#### prior to 2000
# which(df$DATE=="2000-01-01") # 121
df = df[121:386,]
rownames(df) <- 1:nrow(df)
plotts.wge(df$AVG_EP)
plotts.wge(df$CPI)
plotts.wge(df$GAS_P)
plotts.wge(df$TEMP)
# There seem to be a slowly damping behavior which might support difference the data
fig1 = plotts.sample.wge(df$AVG_EP)
# Try overfitting
est = est.ar.wge(df$AVG_EP, p=16, type='burg')
##
##
## Coefficients of AR polynomial:
## 1.1098 -0.0493 -0.1180 0.0863 -0.1701 0.1127 -0.0761 0.0744 0.0928 -0.2121 0.1797 0.3475 -0.4070 -0.0942 0.1370 -0.0341
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9605B 1.0411 0.9605 0.0000
## 1-0.0146B+0.9208B^2 0.0079+-1.0421i 0.9596 0.2488
## 1+0.9509B+0.9131B^2 -0.5207+-0.9078i 0.9555 0.3329
## 1-1.6443B+0.9007B^2 0.9128+-0.5264i 0.9490 0.0833
## 1-0.9206B+0.8507B^2 0.5411+-0.9395i 0.9224 0.1668
## 1-0.9208B 1.0861 0.9208 0.0000
## 1+1.4952B+0.7387B^2 -1.0120+-0.5740i 0.8595 0.4179
## 1+0.8128B -1.2303 0.8128 0.5000
## 1+0.7275B -1.3745 0.7275 0.5000
## 1-0.6354B+0.1371B^2 2.3176+-1.3870i 0.3702 0.0858
##
##
# compare this to seasonality of 12
factor.wge(phi = c(rep(0,11),1))
##
##
## Coefficients of AR polynomial:
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.0000B+1.0000B^2 0.5000+-0.8660i 1.0000 0.1667
## 1-1.0000B 1.0000 1.0000 0.0000
## 1-1.7321B+1.0000B^2 0.8660+-0.5000i 1.0000 0.0833
## 1+1.0000B+1.0000B^2 -0.5000+-0.8660i 1.0000 0.3333
## 1-0.0000B+1.0000B^2 0.0000+-1.0000i 1.0000 0.2500
## 1+1.7321B+1.0000B^2 -0.8660+-0.5000i 1.0000 0.4167
## 1+1.0000B -1.0000 1.0000 0.5000
##
##
# By using overfitting method, 1-B term seems to have the largest absolute reciprocal root which by itself supports differencing the data
# Additionally, there seem to have a 1-B^12 seasonality, even some of the factors such as 1+B at system frequency of -.5 and 1+1.73B+1B^2 at System frequency of 0.4167 are not as close to the unit circle. Although it might worth exploring s = 12 also
d1 = artrans.wge(df$AVG_EP, phi.tr = 1)
# with the differenced data there seem to have some sort of seasonal behavior at 12 left
d1.12 = artrans.wge(d1, phi.tr = c(rep(0,11),0.4))
par(mfrow=c(1,1))
acf(d1.12) # AIC looks about white noise
ljung.wge(d1.12, K=24) # K=24 reject white noise hypothesis
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 39.35395
##
## $df
## [1] 24
##
## $pval
## [1] 0.02506178
ljung.wge(d1.12, K=48) # K = 48 reject white noise
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457 -0.009325486 -0.03049484 -0.01441944 0.03846121 -0.1092629 0.0477006 0.01390417 0.01907413 0.00746239 -0.02442159 0.01661073 0.1696413 0.03805096 0.0511036 0.03743145 -0.07413577 -0.1043787 -0.1778826 -0.08752393 -0.1099639 9.902894e-05 -0.04875424 0.09696807 0.1329072
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 84.83926
##
## $df
## [1] 48
##
## $pval
## [1] 0.0008252289
# Thus models require further modeling
# est1.12 = aic5.wge(d1.12, p=0:15, q=0:5) # AIC picked p = 7, q=4
# est.1.12.bic = aic5.wge(d1.12, p=0:15, q=0:5, type = 'bic') # bic picked p=0, q=0
# est.1.12.aicc = aic5.wge(d1.12, p=0:15, q=0:5, type = 'aicc') # aicc picked p=1, q=0
# seems AIC leaning towards a fancy model, while bic leaning towards white noise.
# I will attempt something in the middle by using AR(1) instead
params.est = est.arma.wge(d1.12, p=1)
##
##
## Coefficients of AR polynomial:
## 0.1182
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.1182B 8.4627 0.1182 0.0000
##
##
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 reject white noise hypothesis
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 38.64312
##
## $df
## [1] 24
##
## $pval
## [1] 0.02975776
ljung.wge(params.est$res, K=48) # K = 48reject null hypothesis of white noise
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151 -0.02076998 -0.02871357 -0.01578537 0.05426695 -0.1226772 0.06055979 0.006250035 0.01706408 0.008359714 -0.02802056 -0.0004448668 0.1679758 0.01269146 0.04367132 0.04119715 -0.06838042 -0.0771094 -0.1602163 -0.05547658 -0.102604 0.01950958 -0.06197392 0.08988801 0.1271899
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 79.38565
##
## $df
## [1] 48
##
## $pval
## [1] 0.002934686
# Try fancier model identified by AIC
params.est = est.arma.wge(d1.12, p=7,q=4)
##
##
## Coefficients of AR polynomial:
## -0.8600 -0.1354 -0.9232 -0.6079 0.1714 -0.0515 -0.0085
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8898B+0.9402B^2 -1.0050+-0.2314i 0.9696 0.4640
## 1-0.7621B+0.8873B^2 0.4294+-0.9709i 0.9420 0.1837
## 1-0.3782B+0.0918B^2 2.0591+-2.5786i 0.3030 0.1428
## 1+0.1104B -9.0565 0.1104 0.5000
##
##
##
##
## Coefficients of MA polynomial:
## -1.0151 -0.3327 -1.0074 -0.7941
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.7997B+0.9310B^2 0.4295+-0.9432i 0.9649 0.1820
## 1+1.8148B+0.8530B^2 -1.0638+-0.2018i 0.9236 0.4702
##
##
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 fail to reject white noise hypothesis
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 16.1709
##
## $df
## [1] 24
##
## $pval
## [1] 0.8818087
ljung.wge(params.est$res, K=48) # K = 48 fail to reject null hypothesis of white noise
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917 0.01760575 -0.08784062 0.02203463 0.0346197 -0.103104 0.06085884 0.01483718 -0.008431453 0.01655047 -0.02007022 -0.02552383 0.2078729 -0.04307097 0.07704926 0.004884021 -0.02369241 -0.1080703 -0.107706 -0.09422072 -0.07229019 0.003069584 -0.0453473 0.07907943 0.134459
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 58.63666
##
## $df
## [1] 48
##
## $pval
## [1] 0.1397722
# All models are wrong some are useful - we will proceed with fancier model for now
model1.param = mult.wge(fac1 = params.est$phi, fac2 = c(rep(0,11),0.4))
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 3, limits = T, lastn = T)
ASE.short = mean((df$AVG_EP[264:266]-pred.short$f)^2)
ASE.short # 3.184093e-05 -> 0.0000318 #New 5.988891e-05 -> 0.0000599
## [1] 5.988891e-05
pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 36, limits = T, lastn = T)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-pred.long$f)^2)
ASE.long # 0.0001725023
## [1] 0.0001725023
# rolling.res.short = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 3 )
# rolling.res.short # RMSE = 0.004, ASE = 1.6e-0.5 -> 0.000016, nums of windows = 239
# rolling.res.long = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 36)
# rolling.res.long # RMSE = 0.014, ASE= 0.000196, num of windows = 206
pred.all.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 3, limits = T, lastn = F)
pred.all.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 36, limits = T, lastn = F)
x = df$AVG_EP
n = length(x)
t = 1:n
d = lm(x~t)
# x.z are the residuals from the regression line
x.z = x-d$coefficients[1] - d$coefficients[2]*t
fig2 = plotts.sample.wge(x.z)
ar.z = aic.wge(x.z, p=0:15)
x.trans = artrans.wge(x, phi.tr = ar.z$phi)
t.trans = artrans.wge(t, phi.tr = ar.z$phi)
fit = lm(x.trans~ t.trans)
summary(fit)
##
## Call:
## lm(formula = x.trans ~ t.trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.014364 -0.002770 -0.000122 0.002314 0.016179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.724e-03 6.588e-04 13.243 < 2e-16 ***
## t.trans 1.957e-04 4.296e-05 4.554 8.22e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004589 on 250 degrees of freedom
## Multiple R-squared: 0.07661, Adjusted R-squared: 0.07291
## F-statistic: 20.74 on 1 and 250 DF, p-value: 8.22e-06
# As can be seen, after account for the serial correlation (AR(14)), there is strong evidence to suggest that the slope is significantly different from zero (pvalue < 0.0001). So we will take the trend out and proceed
# try bootstrapping method just to be sure
wbg.boot.wge(x, sn=103)
## $p
## [1] 2
##
## $phi
## [1] 1.1384970 -0.1616007
##
## $pv
## [1] 0.1077694
# Bootstrap-based test for trend failed to reject with p-value = 0.1078, however we will proceed to try out how using trend helps
ar.est = est.ar.wge(x = x.z, p=14, type = 'burg')
##
##
## Coefficients of AR polynomial:
## 1.0829 -0.0764 -0.0694 0.0897 -0.1963 0.1276 -0.0738 0.0629 0.1052 -0.2335 0.1907 0.3402 -0.3996 -0.0021
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6619B+0.9161B^2 0.9071+-0.5185i 0.9571 0.0826
## 1-1.9020B+0.9086B^2 1.0467+-0.0712i 0.9532 0.0108
## 1-0.0219B+0.9024B^2 0.0121+-1.0526i 0.9500 0.2482
## 1+0.9609B+0.8985B^2 -0.5348+-0.9094i 0.9479 0.3346
## 1-0.9484B+0.8455B^2 0.5608+-0.9318i 0.9195 0.1638
## 1+0.9010B -1.1099 0.9010 0.5000
## 1+1.5840B+0.7807B^2 -1.0145+-0.5017i 0.8836 0.4269
## 1+0.0053B -189.3618 0.0053 0.5000
##
##
par(mfrow=c(1,1))
acf(ar.est$res)
ljtest.3 = ljung.wge(ar.est$res)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181
ljtest.4 = ljung.wge(ar.est$res, K=48)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181 -0.02561514 -0.02072488 -0.02436513 0.07604687 -0.1057342 0.1075884 0.06789439 0.01064667 0.03701851 -0.04101051 -0.02696131 0.1481545 -0.02702826 0.07297504 -0.015737 -0.03237783 -0.06675938 -0.1181091 -0.0329735 -0.1023404 -0.008890106 -0.05093476 0.05829844 0.1212425
# both tests reject white noise
m2.result = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 3, lastn = TRUE, limits=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6637B+0.9197B^2 0.9044+-0.5189i 0.9590 0.0829
## 1-1.9090B+0.9150B^2 1.0432+-0.0685i 0.9566 0.0104
## 1-0.0231B+0.9079B^2 0.0127+-1.0494i 0.9528 0.2481
## 1+0.9692B+0.9021B^2 -0.5372+-0.9055i 0.9498 0.3352
## 1-0.9461B+0.8493B^2 0.5570+-0.9312i 0.9216 0.1642
## 1+0.9029B -1.1075 0.9029 0.5000
## 1+1.5832B+0.7800B^2 -1.0148+-0.5021i 0.8832 0.4269
##
##
ASE.short.m2 = mean((df$AVG_EP[264:266]-m2.result$f)^2)
ASE.short.m2 # 6.718437e-05 -> 0.0000672
## [1] 6.718437e-05
m2.result.long = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 36, lastn = TRUE, limits=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6637B+0.9197B^2 0.9044+-0.5189i 0.9590 0.0829
## 1-1.9090B+0.9150B^2 1.0432+-0.0685i 0.9566 0.0104
## 1-0.0231B+0.9079B^2 0.0127+-1.0494i 0.9528 0.2481
## 1+0.9692B+0.9021B^2 -0.5372+-0.9055i 0.9498 0.3352
## 1-0.9461B+0.8493B^2 0.5570+-0.9312i 0.9216 0.1642
## 1+0.9029B -1.1075 0.9029 0.5000
## 1+1.5832B+0.7800B^2 -1.0148+-0.5021i 0.8832 0.4269
##
##
ASE.long.m2 = mean((df$AVG_EP[(266-36+1):266]-m2.result.long$f)^2)
ASE.long.m2 # 0.0001268439
## [1] 0.0001268439
train.var = df[1:230,2:5]
test.var = df[231:266,2:5]
# Prior to fitting looking at correlation plot to get an idea
ccf(train.var$CPI, train.var$AVG_EP)
# Looks like Gas Price lag2 is most correlated with electricity price
ccf(train.var$GAS_P, train.var$AVG_EP)
# lag 0 or 1 of temperature seems most correlated with electricity price ( although this correlation is weaker compared to Gas and CPI)
ccf(train.var$TEMP, train.var$AVG_EP)
# AIC and FPE selected lag = 11, Schwarz Criterion selected lag = 6, Hannan Quinn Criterion selected lag = 3
# Try them all and compare their short term and long term ASE
VARselect(train.var, lag.max=15, type="both", season = NULL, exogen = NULL)
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 11 6 3 11
##
## $criteria
## 1 2 3 4 5
## AIC(n) -1.283887e+01 -1.379340e+01 -1.430460e+01 -1.446431e+01 -1.457456e+01
## HQ(n) -1.268684e+01 -1.354002e+01 -1.394988e+01 -1.400824e+01 -1.401713e+01
## SC(n) -1.246261e+01 -1.316630e+01 -1.342667e+01 -1.333554e+01 -1.319495e+01
## FPE(n) 2.655678e-06 1.022636e-06 6.136368e-07 5.234925e-07 4.694594e-07
## 6 7 8 9 10
## AIC(n) -1.468531e+01 -1.461657e+01 -1.462841e+01 -1.469184e+01 -1.476153e+01
## HQ(n) -1.402654e+01 -1.385645e+01 -1.376693e+01 -1.372901e+01 -1.369735e+01
## SC(n) -1.305486e+01 -1.273529e+01 -1.249629e+01 -1.230887e+01 -1.212773e+01
## FPE(n) 4.210328e-07 4.521535e-07 4.483443e-07 4.226040e-07 3.962645e-07
## 11 12 13 14 15
## AIC(n) -1.484743e+01 -1.479751e+01 -1.484364e+01 -1.478650e+01 -1.472243e+01
## HQ(n) -1.368191e+01 -1.353063e+01 -1.347541e+01 -1.331692e+01 -1.315150e+01
## SC(n) -1.196279e+01 -1.166203e+01 -1.145733e+01 -1.114935e+01 -1.083443e+01
## FPE(n) 3.660212e-07 3.877759e-07 3.737362e-07 4.000256e-07 4.318945e-07
# Try lag = 11
lsfit.p11 = VAR(train.var, p = 11, type ="both")
preds.p11.short = predict(lsfit.p11, n.ahead = 3)
preds.p11.long = predict(lsfit.p11, n.ahead = 36)
test_ASE_p11_short = mean((preds.p11.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p11_short # 1.40152e-05 -> 0.000014
## [1] 1.40152e-05
test_ASE_p11_long = mean((preds.p11.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p11_long # 0.0001447111
## [1] 0.0001447111
# Try lag = 6
lsfit.p6 = VAR(train.var, p = 6, type ="both")
preds.p6.short = predict(lsfit.p6, n.ahead = 3)
preds.p6.long = predict(lsfit.p6, n.ahead = 36)
test_ASE_p6_short = mean((preds.p6.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p6_short # 9.127952e-06 -> 0.00000913
## [1] 9.127952e-06
test_ASE_p6_long = mean((preds.p6.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p6_long # 0.0001166065
## [1] 0.0001166065
# Try lag = 3
lsfit.p3 = VAR(train.var, p = 3, type ="both")
preds.p3.short = predict(lsfit.p3, n.ahead = 3)
preds.p3.long = predict(lsfit.p3, n.ahead = 36)
test_ASE_p3_short = mean((preds.p3.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p3_short # 2.780987e-06 -> 0.000002781
## [1] 2.780987e-06
test_ASE_p3_long = mean((preds.p3.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p3_long # 9.241909e-05 -> 0.00009242
## [1] 9.241909e-05
# Short Term Forecast - VAR best model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast VAR")
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
# Long Term Forecast - VAR best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast VAR")
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
VARselect(df[,2:5], lag.max = 15, type = "both")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 13 6 3 13
##
## $criteria
## 1 2 3 4 5
## AIC(n) -1.237609e+01 -1.329221e+01 -1.376292e+01 -1.396360e+01 -1.412211e+01
## HQ(n) -1.224044e+01 -1.306612e+01 -1.344639e+01 -1.355664e+01 -1.362471e+01
## SC(n) -1.203900e+01 -1.273038e+01 -1.297636e+01 -1.295232e+01 -1.288609e+01
## FPE(n) 4.218386e-06 1.687877e-06 1.054488e-06 8.632014e-07 7.372743e-07
## 6 7 8 9 10
## AIC(n) -1.421480e+01 -1.414446e+01 -1.418728e+01 -1.428358e+01 -1.437366e+01
## HQ(n) -1.362696e+01 -1.346619e+01 -1.341857e+01 -1.342443e+01 -1.342408e+01
## SC(n) -1.275405e+01 -1.245899e+01 -1.227708e+01 -1.214865e+01 -1.201400e+01
## FPE(n) 6.728004e-07 7.229901e-07 6.941467e-07 6.321121e-07 5.795833e-07
## 11 12 13 14 15
## AIC(n) -1.446661e+01 -1.443772e+01 -1.449996e+01 -1.445400e+01 -1.440935e+01
## HQ(n) -1.342658e+01 -1.330726e+01 -1.327906e+01 -1.314267e+01 -1.300758e+01
## SC(n) -1.188221e+01 -1.162859e+01 -1.146611e+01 -1.119542e+01 -1.092604e+01
## FPE(n) 5.302822e-07 5.484751e-07 5.183337e-07 5.463499e-07 5.757315e-07
var.model = VAR(df[,2:5], p=3,type = "both")
forecast.3month = predict(var.model, n.ahead = 3)
forecast.36month = predict(var.model, n.ahead = 36)
# Plot Short Term
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast VAR")
points(266:269,c(df$AVG_EP[266:266],forecast.3month$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(266:269,c(df$AVG_EP[266:266],forecast.3month$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(266:269,c(df$AVG_EP[266:266],forecast.3month$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 302),ylim=c(0.05,0.25),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast VAR")
points(266:302,c(df$AVG_EP[266:266],forecast.36month$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(266:302,c(df$AVG_EP[266:266],forecast.36month$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(266:302,c(df$AVG_EP[266:266],forecast.36month$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
-Forecast final 36 values for temperature, gas, and CPI using univariate MLP models. -Use these predictions in multivariate MLP model to predict final 36 electricity values.
df.train = ts(df[1:230,2], start = c(2000,1), frequency = 12)
df.test = ts(df[231:266,2], start=c(2019,3), frequency = 12)
other.predictors.train = df[1:230,3:5]
set.seed(2)
fit.mlp1 = mlp(df.train, reps = 20, comb = "median", xreg = other.predictors.train,
xreg.lags = c(1,1,1), allow.det.season = FALSE, hd = 5, difforder = 0, sel.lag = FALSE)
fit.mlp1
## MLP fit with 5 hidden nodes and 20 repetitions.
## Univariate lags: (1,2,3,4,5,6,7,8,9,10,11,12)
## 3 regressors included.
## - Regressor 1 lags: (1)
## - Regressor 2 lags: (1)
## - Regressor 3 lags: (1)
## Forecast combined using the median operator.
## MSE: 0.
plot(fit.mlp1)
# Forecast next 36 CPI
set.seed(2)
fit.mlp.cpi = mlp(ts(df$CPI[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.cpi = forecast(fit.mlp.cpi, h=36)
plot(fore.mlp.cpi)
# Forecast next 36 Gas_P
set.seed(2)
fit.mlp.gas = mlp(ts(df$GAS_P[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.gas = forecast(fit.mlp.gas, h=36)
plot(fore.mlp.gas)
# Forecast next 36 Temperature
set.seed(2)
fit.mlp.temp = mlp(ts(df$TEMP[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.temp = forecast(fit.mlp.temp, h=36)
plot(fore.mlp.temp)
other.predictors.with.forecast = data.frame(CPI = c(df$CPI[1:230],fore.mlp.cpi$mean[1:36]),
GAS_P = c(df$GAS_P[1:230],fore.mlp.gas$mean[1:36]),
GAS_P = c(df$TEMP[1:230],fore.mlp.temp$mean[1:36]))
fore.mlp1.short = forecast(fit.mlp1, h=3, xreg = other.predictors.with.forecast)
test_ASE_mlp1_short = mean((fore.mlp1.short$mean[1:3]-df$AVG_EP[231:233])^2)
test_ASE_mlp1_short # 1.947679e-05 -> 0.00001947
## [1] 1.947679e-05
fore.mlp1.long = forecast(fit.mlp1, h=36, xreg = other.predictors.with.forecast)
test_ASE_mlp1_long = mean((fore.mlp1.long$mean[1:36]-df$AVG_EP[231:266])^2)
test_ASE_mlp1_long # 0.000111
## [1] 0.0001109964
# Short Term Forecast - MLP model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast MLP")
points(230:233,c(df$AVG_EP[230:230],fore.mlp1.short$mean[1:3]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
# Long Term Forecast - VAR best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast MLP")
points(230:266,c(df$AVG_EP[230:230],fore.mlp1.long$mean[1:36]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
df.mlp.train = ts(df[1:266,2], start = c(2000,1), frequency = 12)
other.predictors.mlp.train = df[1:266,3:5]
set.seed(2)
fit.all.mlp = mlp(df.mlp.train, reps = 20, comb = "median", xreg = other.predictors.mlp.train,
xreg.lags = c(1,1,1), allow.det.season = FALSE, hd = 5, difforder = 0, sel.lag = FALSE)
fit.all.mlp
## MLP fit with 5 hidden nodes and 20 repetitions.
## Univariate lags: (1,2,3,4,5,6,7,8,9,10,11,12)
## 3 regressors included.
## - Regressor 1 lags: (1)
## - Regressor 2 lags: (1)
## - Regressor 3 lags: (1)
## Forecast combined using the median operator.
## MSE: 0.
plot(fit.all.mlp)
# Forecast next 36 CPI
set.seed(2)
fit.mlp.all.cpi = mlp(ts(df$CPI[1:266], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.all.cpi = forecast(fit.mlp.all.cpi, h=36)
plot(fore.mlp.all.cpi)
# Forecast next 36 Gas_P
set.seed(2)
fit.mlp.all.gas = mlp(ts(df$GAS_P[1:266], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.all.gas = forecast(fit.mlp.all.gas, h=36)
plot(fore.mlp.all.gas)
# Forecast next 36 Temperature
set.seed(2)
fit.mlp.all.temp = mlp(ts(df$TEMP[1:266], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.all.temp = forecast(fit.mlp.all.temp, h=36)
plot(fore.mlp.all.temp)
other.predictors.with.forecast = data.frame(CPI = c(df$CPI[1:266],fore.mlp.all.cpi$mean[1:36]),
GAS_P = c(df$GAS_P[1:266],fore.mlp.all.gas$mean[1:36]),
GAS_P = c(df$TEMP[1:266],fore.mlp.all.temp$mean[1:36]))
# Short Term forecast with MLP
fore.mlp.all.short = forecast(fit.all.mlp, h=3, xreg = other.predictors.with.forecast)
# Long term forecast with MLP
fore.mlp.all.long = forecast(fit.all.mlp, h=36, xreg = other.predictors.with.forecast)
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast MLP")
points(266:269,c(df$AVG_EP[266:266],fore.mlp.all.short$mean[1:3]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 302),ylim=c(0.05,0.185),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast MLP")
points(266:302,c(df$AVG_EP[266:266],fore.mlp.all.long$mean[1:36]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
ensemble.forecast.short = (pred.short$f+preds.p3.short$fcst$AVG_EP[,1]+fore.mlp1.short$mean[1:3])/3
test_ASE_ensemble1.short = mean((ensemble.forecast.short-df$AVG_EP[231:233])^2)
test_ASE_ensemble1.short # 3.312542e-06 -> 0.0000033
## [1] 3.312542e-06
ensemble.forecast.long = (pred.long$f+preds.p3.long$fcst$AVG_EP[,1]+fore.mlp1.long$mean[1:36])/3
test_ASE_ensemble1.long = mean((ensemble.forecast.long-df$AVG_EP[231:266])^2)
test_ASE_ensemble1.long # 7.819804e-05 -> 0.0000782
## [1] 7.819804e-05
# Short Term Forecast - Ensemble model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast Ensemble")
points(230:233,c(df$AVG_EP[230:230],ensemble.forecast.short),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
# Long Term Forecast - Ensemble best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast Ensemble")
points(230:266,c(df$AVG_EP[230:230],ensemble.forecast.long),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
ensemble.forecast.short = (pred.short$f+fore.mlp1.short$mean[1:3])/2
test_ASE_ensemble1.short = mean((ensemble.forecast.short-df$AVG_EP[231:233])^2)
test_ASE_ensemble1.short # 1.079313e-05 -> 0.00001079
## [1] 1.079313e-05
ensemble.forecast.long = (pred.long$f+fore.mlp1.long$mean[1:36])/2
test_ASE_ensemble1.long = mean((ensemble.forecast.long-df$AVG_EP[231:266])^2)
test_ASE_ensemble1.long # 7.769396e-05 -> 0.00007769
## [1] 7.769396e-05
# Short Term Forecast - Ensemble model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast Ensemble")
points(230:233,c(df$AVG_EP[230:230],ensemble.forecast.short),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
# Long Term Forecast - Ensemble best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast Ensemble")
points(230:266,c(df$AVG_EP[230:230],ensemble.forecast.long),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
ensemble.all.short = (pred.all.short$f+fore.mlp.all.short$mean[1:3])/2
ensemble.all.long = (pred.all.long$f+fore.mlp.all.long$mean[1:36])/2
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast Ensemble")
points(266:269,c(df$AVG_EP[266:266],ensemble.all.short),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 302),ylim=c(0.05,0.185),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast Ensemble")
points(266:302,c(df$AVG_EP[266:266],fore.mlp.all.long$mean[1:36]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
The final recommendation for short-term (3 month) forecasting is the VAR model with p=3. It had the best short-term test ASE of all models explored. The final recommendation for long-term (36 month) forecasting is an ensemble model that equally combines the ARIMA(7,1,4) s=12 with coefficient of 0.4 and the best MLP model. This ensemble had the best long-term (36 month) test ASE and its future forecasts looked promising.
library(ggplot2)
df[['DATE']] <- as.Date(df[['DATE']], format='%Y-%m-%d')
# Convert short term prediction to dataframe
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 3, limits = T, lastn = F)
dev.off()
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.175),col=1)
axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.2,1.2,1.2),text=c("Time","",""),line=c(1.2,2.1,1.8))
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$f),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ul),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ll),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 36, limits = T, lastn = F)
pred.short.df = data.frame(Date = df$DATE[264:266], prediction = pred.short$f, upper = pred.short$ul, lower = pred.short$ll)
colors <- c("Actual Price" = "#00AFBB", "Predicted Price" = "#FC4E07", "Prediction Limit"= "#E7B800")
# Original TS plot:
ggplot(data = df[1:266,], aes(x=DATE, y=AVG_EP))+
geom_line(color="#00AFBB", size = 1) +
geom_point(color="#00AFBB") +
ggtitle("Electricity Price Over time")+xlab("Date")+ylab("Average Price") + theme_classic()
ggplot(data = df[260:266,], aes(x=DATE, y=AVG_EP, color="Actual Price"))+
geom_line( size = 1) +
ggtitle("Short term average Electricity price forecast")+xlab("Date")+ylab("Average Price") +
geom_line(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.1) +
geom_point(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.5)+
geom_line(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
geom_point(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
geom_line(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+
geom_point(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+
scale_color_manual(values = colors) + theme_classic()
plotts.sample.wge(df$AVG_EP)
## $xbar
## [1] 0.1177444
##
## $autplt
## [1] 1.0000000 0.9602995 0.9151642 0.8723585 0.8295085 0.7865279 0.7574500
## [8] 0.7362720 0.7262652 0.7196580 0.7114933 0.7017810 0.6879721 0.6568276
## [15] 0.6238992 0.5937430 0.5625050 0.5322346 0.5184125 0.5104122 0.5059540
## [22] 0.5026686 0.4994694 0.4952331 0.4869328 0.4606747
##
## $freq
## [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
## [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
## [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
## [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
## [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
## [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
## [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
## [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
## [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
## [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
## [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
## [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
## [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
## [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
## [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
## [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
## [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
##
## $dbz
## [1] 12.5686893 12.3173433 11.8982305 11.3114379 10.5579811 9.6409858
## [7] 8.5676671 7.3524912 6.0218430 4.6198784 3.2132709 1.8890411
## [13] 0.7381312 -0.1737122 -0.8283465 -1.2512906 -1.4891883 -1.5921087
## [19] -1.6091720 -1.5896992 -1.5821599 -1.6303232 -1.7694859 -2.0245855
## [25] -2.4100298 -2.9302313 -3.5799832 -4.3442938 -5.1978261 -6.1046537
## [31] -7.0196173 -7.8927223 -8.6769837 -9.3376650 -9.8587949 -10.2438964
## [37] -10.5116138 -10.6894307 -10.8079797 -10.8966372 -10.9802874 -11.0772077
## [43] -11.1981223 -11.3463448 -11.5187772 -11.7074923 -11.9016724 -12.0897493
## [49] -12.2616000 -12.4105708 -12.5349379 -12.6383063 -12.7285740 -12.8154841
## [55] -12.9072621 -13.0071528 -13.1107990 -13.2054874 -13.2722777 -13.2914789
## [61] -13.2502907 -13.1493354 -13.0044255 -12.8426050 -12.6950565 -12.5905515
## [67] -12.5514804 -12.5924245 -12.7202475 -12.9346975 -13.2288848 -13.5893678
## [73] -13.9958765 -14.4210309 -14.8308363 -15.1871536 -15.4531999 -15.6016961
## [79] -15.6227241 -15.5269821 -15.3423318 -15.1058464 -14.8557019 -14.6258190
## [85] -14.4436045 -14.3297151 -14.2986430 -14.3593301 -14.5154180 -14.7649981
## [91] -15.0998965 -15.5047232 -15.9562371 -16.4240086 -16.8735912 -17.2727527
## [97] -17.5992900 -17.8466594 -18.0238263 -18.1492694 -18.2427639 -18.3189949
## [103] -18.3848575 -18.4401307 -18.4802628 -18.4998799 -18.4957898 -18.4686207
## [109] -18.4228343 -18.3655051 -18.3046120 -18.2475191 -18.1999932 -18.1657677
## [115] -18.1464773 -18.1417432 -18.1492234 -18.1645248 -18.1809693 -18.1893543
## [121] -18.1780207 -18.1336861 -18.0434445 -17.8979003 -17.6946141 -17.4404011
## [127] -17.1512365 -16.8496724 -16.5609037 -16.3089797 -16.1141036 -15.9911686
## [133] -15.9491882
# slowly damping autocorrelations
# spectral density peaks at 0 (wandering behavior)
# possible seasonal peak at 1/12 = .0833333 (monthly)
# est.ar.wge(df$AVG_EP,p=15,type="burg")
# factor.wge(phi=c(rep(0,11),1))
# some evidence of s=12 in overfit, though Abs Recip should be a higher
# take difference and look at acf
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)
dev.off()
## null device
## 1
acf(AVE_EP_d1)
plotts.sample.wge(AVE_EP_d1)
## $xbar
## [1] 0.0003509434
##
## $autplt
## [1] 1.000000000 0.143400409 0.007216805 0.004618813 -0.003852025
## [6] -0.190246498 -0.124240595 -0.175915478 -0.025667511 0.058883404
## [11] -0.083321390 0.090451630 0.411172282 0.100208511 -0.065665133
## [16] 0.023298835 -0.063084353 -0.181683095 -0.140869541 -0.222865159
## [21] -0.045233335 -0.018336048 -0.015243214 0.088588981 0.368127820
## [26] 0.038711866
##
## $freq
## [1] 0.003773585 0.007547170 0.011320755 0.015094340 0.018867925 0.022641509
## [7] 0.026415094 0.030188679 0.033962264 0.037735849 0.041509434 0.045283019
## [13] 0.049056604 0.052830189 0.056603774 0.060377358 0.064150943 0.067924528
## [19] 0.071698113 0.075471698 0.079245283 0.083018868 0.086792453 0.090566038
## [25] 0.094339623 0.098113208 0.101886792 0.105660377 0.109433962 0.113207547
## [31] 0.116981132 0.120754717 0.124528302 0.128301887 0.132075472 0.135849057
## [37] 0.139622642 0.143396226 0.147169811 0.150943396 0.154716981 0.158490566
## [43] 0.162264151 0.166037736 0.169811321 0.173584906 0.177358491 0.181132075
## [49] 0.184905660 0.188679245 0.192452830 0.196226415 0.200000000 0.203773585
## [55] 0.207547170 0.211320755 0.215094340 0.218867925 0.222641509 0.226415094
## [61] 0.230188679 0.233962264 0.237735849 0.241509434 0.245283019 0.249056604
## [67] 0.252830189 0.256603774 0.260377358 0.264150943 0.267924528 0.271698113
## [73] 0.275471698 0.279245283 0.283018868 0.286792453 0.290566038 0.294339623
## [79] 0.298113208 0.301886792 0.305660377 0.309433962 0.313207547 0.316981132
## [85] 0.320754717 0.324528302 0.328301887 0.332075472 0.335849057 0.339622642
## [91] 0.343396226 0.347169811 0.350943396 0.354716981 0.358490566 0.362264151
## [97] 0.366037736 0.369811321 0.373584906 0.377358491 0.381132075 0.384905660
## [103] 0.388679245 0.392452830 0.396226415 0.400000000 0.403773585 0.407547170
## [109] 0.411320755 0.415094340 0.418867925 0.422641509 0.426415094 0.430188679
## [115] 0.433962264 0.437735849 0.441509434 0.445283019 0.449056604 0.452830189
## [121] 0.456603774 0.460377358 0.464150943 0.467924528 0.471698113 0.475471698
## [127] 0.479245283 0.483018868 0.486792453 0.490566038 0.494339623 0.498113208
##
## $dbz
## [1] -1.120417257 -1.057321119 -0.967803862 -0.871366652 -0.788722954
## [6] -0.737328900 -0.727027932 -0.755566598 -0.803936769 -0.832929506
## [11] -0.785111771 -0.598357730 -0.231544277 0.311855412 0.982548849
## [16] 1.707474033 2.415388266 3.050760530 3.575864464 3.967301805
## [21] 4.211630518 4.301984419 4.236042901 4.015232297 3.645031667
## [26] 3.136374621 2.508200347 1.791000750 1.030379668 0.287870305
## [31] -0.365621850 -0.866725662 -1.180883021 -1.312713072 -1.296521583
## [36] -1.176282765 -0.990656767 -0.768813704 -0.533255163 -0.303994930
## [41] -0.100901678 0.056300912 0.149446791 0.163653216 0.088751394
## [46] -0.079643613 -0.339188280 -0.679708681 -1.081799949 -1.515313688
## [51] -1.938507184 -2.299460174 -2.541922372 -2.616659878 -2.495489134
## [56] -2.180831391 -1.704558815 -1.117208409 -0.474653659 0.171621987
## [61] 0.779050466 1.314716300 1.754229965 2.080053396 2.279937105
## [66] 2.345730221 2.272647177 2.059027183 1.706645504 1.221699857
## [71] 0.616681931 -0.086592792 -0.852639464 -1.627201727 -2.334032236
## [76] -2.879813628 -3.174836472 -3.166982246 -2.866661700 -2.339837877
## [81] -1.676706490 -0.962703359 -0.265241733 0.367438140 0.902603911
## [86] 1.318781847 1.602349308 1.745160510 1.743061309 1.595140367
## [91] 1.303644764 0.874585701 0.319147750 -0.343956826 -1.085220560
## [96] -1.860869906 -2.611439622 -3.266625145 -3.760214129 -4.051758377
## [101] -4.141254015 -4.064681630 -3.874998452 -3.623685594 -3.351483551
## [106] -3.086961339 -2.848371047 -2.645761586 -2.482366783 -2.355417603
## [111] -2.256930428 -2.175055600 -2.096363575 -2.008973862 -1.905767886
## [116] -1.786446623 -1.657390290 -1.529195151 -1.412773358 -1.315320336
## [121] -1.237242196 -1.170777151 -1.100883924 -1.008811114 -0.878045087
## [126] -0.700876001 -0.482625607 -0.241275313 -0.002837887 0.204940314
## [131] 0.357919662 0.438844743
# characteristic seasonal component for 12 months (ACF large at multiples of 12)
AVE_EP_d1_s12 = artrans.wge(AVE_EP_d1,phi.tr=c(rep(0,11),1))
plotts.sample.wge(AVE_EP_d1_s12)
## $xbar
## [1] 9.881423e-05
##
## $autplt
## [1] 1.000000000 0.061881369 0.149355139 -0.067465091 0.065996069
## [6] 0.005480036 0.014532954 0.081574193 -0.007551414 0.110805568
## [11] -0.133491536 -0.005427407 -0.467952630 0.044216355 -0.078037716
## [16] 0.019932518 -0.072550894 0.058968103 -0.061592872 -0.153638588
## [21] -0.019250759 -0.075084909 0.074778865 -0.011612515 -0.027345280
## [26] -0.052413426
##
## $freq
## [1] 0.003952569 0.007905138 0.011857708 0.015810277 0.019762846 0.023715415
## [7] 0.027667984 0.031620553 0.035573123 0.039525692 0.043478261 0.047430830
## [13] 0.051383399 0.055335968 0.059288538 0.063241107 0.067193676 0.071146245
## [19] 0.075098814 0.079051383 0.083003953 0.086956522 0.090909091 0.094861660
## [25] 0.098814229 0.102766798 0.106719368 0.110671937 0.114624506 0.118577075
## [31] 0.122529644 0.126482213 0.130434783 0.134387352 0.138339921 0.142292490
## [37] 0.146245059 0.150197628 0.154150198 0.158102767 0.162055336 0.166007905
## [43] 0.169960474 0.173913043 0.177865613 0.181818182 0.185770751 0.189723320
## [49] 0.193675889 0.197628458 0.201581028 0.205533597 0.209486166 0.213438735
## [55] 0.217391304 0.221343874 0.225296443 0.229249012 0.233201581 0.237154150
## [61] 0.241106719 0.245059289 0.249011858 0.252964427 0.256916996 0.260869565
## [67] 0.264822134 0.268774704 0.272727273 0.276679842 0.280632411 0.284584980
## [73] 0.288537549 0.292490119 0.296442688 0.300395257 0.304347826 0.308300395
## [79] 0.312252964 0.316205534 0.320158103 0.324110672 0.328063241 0.332015810
## [85] 0.335968379 0.339920949 0.343873518 0.347826087 0.351778656 0.355731225
## [91] 0.359683794 0.363636364 0.367588933 0.371541502 0.375494071 0.379446640
## [97] 0.383399209 0.387351779 0.391304348 0.395256917 0.399209486 0.403162055
## [103] 0.407114625 0.411067194 0.415019763 0.418972332 0.422924901 0.426877470
## [109] 0.430830040 0.434782609 0.438735178 0.442687747 0.446640316 0.450592885
## [115] 0.454545455 0.458498024 0.462450593 0.466403162 0.470355731 0.474308300
## [121] 0.478260870 0.482213439 0.486166008 0.490118577 0.494071146 0.498023715
##
## $dbz
## [1] 0.256286821 0.546568420 0.952837510 1.397969554 1.818517774
## [6] 2.171517399 2.431835518 2.586947169 2.632367320 2.568479159
## [11] 2.398581638 2.127839581 1.762935993 1.312399163 0.787751723
## [16] 0.205768851 -0.407883853 -1.013892123 -1.555395592 -1.960157417
## [21] -2.155909549 -2.098691610 -1.796550260 -1.306688445 -0.708789499
## [26] -0.077950661 0.528174551 1.070090033 1.522194178 1.868123676
## [31] 2.097279981 2.202672411 2.179826470 2.026544105 1.743464190
## [36] 1.335558727 0.814865432 0.204765970 -0.454441082 -1.100884316
## [41] -1.653410996 -2.029142815 -2.175656259 -2.096353000 -1.844515099
## [46] -1.492349427 -1.102864557 -0.718933511 -0.365235922 -0.054692828
## [51] 0.205443392 0.409412480 0.550750500 0.621643480 0.613302099
## [56] 0.516918667 0.324973300 0.032925741 -0.358383402 -0.839054363
## [61] -1.384612777 -1.949022340 -2.459779620 -2.824432126 -2.959127196
## [66] -2.831252082 -2.480174531 -1.993584247 -1.465174899 -0.968259754
## [71] -0.550901214 -0.241471621 -0.055723628 -0.002093647 -0.084856825
## [76] -0.305480057 -0.662376918 -1.148854163 -1.748558382 -2.427506214
## [81] -3.123025247 -3.735547164 -4.140359499 -4.236959997 -4.011963346
## [86] -3.548246410 -2.968531294 -2.377735302 -1.842765671 -1.397726628
## [91] -1.055164236 -0.815182020 -0.671416274 -0.614833756 -0.636258901
## [96] -0.728051343 -0.884911360 -1.103495838 -1.380378027 -1.707835733
## [101] -2.067067325 -2.419280736 -2.698181090 -2.813739011 -2.680178203
## [106] -2.262694762 -1.603612522 -0.798765477 0.049284527 0.860873032
## [111] 1.584517262 2.190512434 2.662960956 2.993768597 3.178835139
## [116] 3.215817299 3.102884820 2.838125422 2.419508272 1.845626764
## [121] 1.117960269 0.246390726 -0.738560101 -1.761896007 -2.670695542
## [126] -3.229542302
dev.off()
## null device
## 1
acf(AVE_EP_d1_s12)
# doesn't look like white noise
# aic5.wge(AVE_EP_d1_s12,p=0:15) #13,0 12,2 12,0
EP_est = est.arma.wge(AVE_EP_d1_s12,p=12)
##
##
## Coefficients of AR polynomial:
## 0.0678 0.0697 -0.0351 0.0627 0.0391 0.0130 0.0534 0.0131 0.0759 -0.0687 -0.0137 -0.4792
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8492B+0.9205B^2 -1.0044+-0.2784i 0.9594 0.4570
## 1-1.8551B+0.9104B^2 1.0189+-0.2456i 0.9541 0.0377
## 1-1.3412B+0.9077B^2 0.7388+-0.7456i 0.9527 0.1257
## 1-0.4737B+0.8816B^2 0.2687+-1.0306i 0.9389 0.2094
## 1+1.2920B+0.8465B^2 -0.7631+-0.7739i 0.9201 0.3739
## 1+0.4610B+0.8443B^2 -0.2730+-1.0535i 0.9188 0.2904
##
##
# short 3 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=3,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
# 2.8495e-05 -> 0.0000285
f.short = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short$f)^2)
ASE.short #3.595854e-05 -> 0.000036
## [1] 3.595854e-05
# long 36 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
# 0.0004547268
f.long = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long$f)^2)
ASE.long
## [1] 0.001223659
#0.001223659
# try other AIC5 options with long forecast: ARUMA(13,1,0) s=12
EP_est_13 = est.arma.wge(AVE_EP_d1_s12,p=13)
##
##
## Coefficients of AR polynomial:
## 0.1128 0.0693 -0.0277 0.0568 0.0365 0.0050 0.0535 0.0144 0.0724 -0.0676 -0.0225 -0.4814 0.0981
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8636B+0.9335B^2 -0.9982+-0.2736i 0.9662 0.4574
## 1-1.3203B+0.8982B^2 0.7350+-0.7571i 0.9477 0.1273
## 1-1.8307B+0.8877B^2 1.0311+-0.2516i 0.9422 0.0381
## 1-0.4434B+0.8830B^2 0.2510+-1.0341i 0.9397 0.2121
## 1+1.3242B+0.8664B^2 -0.7642+-0.7551i 0.9308 0.3760
## 1+0.4947B+0.8570B^2 -0.2886+-1.0409i 0.9257 0.2930
## 1-0.2010B 4.9750 0.2010 0.0000
##
##
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_13$phi,thetas=EP_est_13$theta)
# 0.0004697769
f = fore.arima.wge(df$AVG_EP,phi=EP_est_13$phi,theta=EP_est_13$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2)
## [1] 0.001237814
#0.001237814
# try 12,2 ARUMA(12,1,2) s=12
EP_est_12_2 = est.arma.wge(AVE_EP_d1_s12,p=12,q=2)
##
##
## Coefficients of AR polynomial:
## -0.1173 -0.1001 0.0061 0.0723 0.0347 0.0251 0.0581 0.0310 0.0890 -0.0515 -0.0205 -0.5046
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8498B+0.9224B^2 -1.0027+-0.2806i 0.9604 0.4566
## 1-0.4248B+0.9030B^2 0.2353+-1.0257i 0.9502 0.2141
## 1-1.3025B+0.8935B^2 0.7288+-0.7668i 0.9453 0.1290
## 1+0.4967B+0.8923B^2 -0.2783+-1.0214i 0.9446 0.2923
## 1+1.3066B+0.8760B^2 -0.7458+-0.7651i 0.9359 0.3730
## 1-1.8084B+0.8674B^2 1.0424+-0.2573i 0.9313 0.0385
##
##
##
##
## Coefficients of MA polynomial:
## -0.2340 -0.2435
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1+0.2340B+0.2435B^2 -0.4806+-1.9687i 0.4935 0.2881
##
##
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_12_2$phi,thetas=EP_est_12_2$theta)
# 0.0004835913
f = fore.arima.wge(df$AVG_EP,phi=EP_est_12_2$phi,theta=EP_est_12_2$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2)
## [1] 0.001275155
# 0.00128316
# None of these beat Nick's ARUMA(13,1,5) s=12
# Try another model entirely - take 1st diff then fit an ARMA
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=10:20,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5,type="bic") #12,0 13,1
# ARIMA(15,1,5)
EP_est_15_5 = est.arma.wge(AVE_EP_d1,p=15,q=5)
##
##
## Coefficients of AR polynomial:
## -0.1148 0.4747 0.2752 -0.1471 -0.7224 0.0089 -0.0739 0.0415 0.1338 -0.2228 -0.1037 0.2903 0.1106 -0.1338 -0.1832
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7289B+0.9959B^2 0.8680+-0.5007i 0.9980 0.0833
## 1+0.9979B -1.0021 0.9979 0.5000
## 1+0.9941B+0.9922B^2 -0.5009+-0.8700i 0.9961 0.3331
## 1-0.0030B+0.8512B^2 0.0018+-1.0839i 0.9226 0.2497
## 1-1.0089B+0.7780B^2 0.6484+-0.9300i 0.8820 0.1531
## 1-1.6665B+0.7403B^2 1.1255+-0.2897i 0.8604 0.0401
## 1+1.5748B+0.7012B^2 -1.1230+-0.4063i 0.8374 0.4447
## 1+0.9554B+0.5405B^2 -0.8838+-1.0340i 0.7352 0.3626
##
##
##
##
## Coefficients of MA polynomial:
## -0.2191 0.4727 0.4063 -0.2067 -0.8269
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1+0.9732B -1.0275 0.9732 0.5000
## 1-1.6903B+0.9350B^2 0.9039+-0.5025i 0.9670 0.0807
## 1+0.9362B+0.9087B^2 -0.5151+-0.9139i 0.9532 0.3317
##
##
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
# 3.695999e-05
dev.off()
## null device
## 1
f.short.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.15.5$f)^2)
ASE.short #4.216765e-05 -> 0.000042167
## [1] 4.567978e-05
# 36 month ARIMA(15,1,5)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
# 0.0002368869
f.long.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.15.5$f)^2)
ASE.long
## [1] 0.0001089089
# 0.0001089089
# ARIMA(12,1,0)
EP_est_12_0 = est.arma.wge(AVE_EP_d1,p=12)
##
##
## Coefficients of AR polynomial:
## 0.1026 0.0192 -0.0458 0.0443 -0.1428 -0.0133 -0.0959 -0.0076 0.0919 -0.1504 0.0576 0.3936
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6643B+0.9156B^2 0.9089+-0.5159i 0.9568 0.0822
## 1-0.0267B+0.9047B^2 0.0148+-1.0513i 0.9511 0.2478
## 1+0.9674B+0.8990B^2 -0.5381+-0.9071i 0.9481 0.3352
## 1-0.9514B+0.8435B^2 0.5640+-0.9314i 0.9184 0.1633
## 1-0.9027B 1.1078 0.9027 0.0000
## 1+0.8985B -1.1130 0.8985 0.5000
## 1+1.5766B+0.7727B^2 -1.0202+-0.5033i 0.8790 0.4271
##
##
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_12_0$phi)
# 2.661653e-05
f.short.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.12.0$f)^2)
ASE.short # 8.029078e-05 -> 0.0000803
## [1] 8.029078e-05
# 36 month ARIMA(12,1,0)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_12_0$phi)
# 0.0002323361
f.long.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.12.0$f)^2)
ASE.long
## [1] 0.0001502498
# 0.0001502498
plotts.sample.wge(df$AVG_EP)
## over-fit like our lives depend on it
overfit_ar_30_ricco=est.ar.wge(df$AVG_EP,p=50, type='burg')
# Clearly a 1-B in there. try removing it and over-fitting again.
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))
overfit_ar_30_i1_ricco=est.ar.wge(AVG_E_i1_ricco,p=20, type='burg')
# nonstationary_lambda = mult.wge(fac1=c(1-1.7276B+0.9942B^2), c(1+0.9951B), c(1+0.0130B+0.9896B^2), c(1+0.9916B+0.9889B^2))
nonstationary_lambda_ricco = mult.wge(c(1.7276,-0.9942), c(-0.9951), c(-0.0130,-0.9896), c(-0.9916,-0.9889))
nonstationary_lambda$model.coef
AVG_E_lambda_corrected_ricco = artrans.wge(df$AVG_EP, c(nonstationary_lambda$model.coef))
aic_lambda_corrected_ricco = aic.wge(AVG_E_lambda_corrected_ricco, p=0:12, q=0:5, type="bic")
forecast_arma = fore.aruma.wge(df$AVG_EP, phi=aic_lambda_corrected_ricco$phi, theta=aic_lambda_corrected_ricco$theta, d=1, n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE, lambda =c(nonstationary_lambda$model.coef))
forecast_arma_ricco = fore.aruma.wge(df$AVG_EP, phi=overfit_ar_30_i1_ricco$phi,n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE)
## ^^^ YUCK! that looks terrible. let's try a different approach.
# try finding best long non-stationary estimates Seasonality by rolling window rmse (this has O(n^2) runtime complexity)
best_s_ricco = 0
best_d_ricco = 0
max_s_ricco = 10
max_d_ricco = 2
best_rsme_seasonality_search_and_wandering_search = 10000.0
for (s_try in 0:max_s_ricco) {
for (d_try in 0:max_d_ricco) {
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, d = d_try, s=s_try, horizon = 36, plot=FALSE)
if(rolling.res.long_ricco$rwRMSE < best_rsme_seasonality_search_and_wandering_search) {
best_rsme_seasonality_search_and_wandering_search = rolling.res.long_ricco$rwRMSE
best_s_ricco = s_try
best_d_ricco = d_try
}
}
}
print(paste("best long rolling window s was:", best_s_ricco))
print(paste("best long rolling window d was:", best_d_ricco))
# best s=0 and d=1
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))
# now find best p and q with a rolling window wide grid search
best_p_wide_ricco = 0
best_q_wide_ricco = 0
best_cost_wide_ricco = 10000
best_rsme_s0_d1_p_search_wide = 10000.0
best_last36_ase_wide = 10000.0
q_seq_wide_ricco = seq(1, 11, 5)
p_seq_wide_ricco = seq(0, 30, 10)
for(q_try in q_seq_wide_ricco) {
for (p_try in p_seq_wide_ricco) {
was_error = FALSE
tryCatch(expr = {
print(paste("trying p: ", p_try, "and q: ", q_try))
capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0, horizon = 36, plot=FALSE)
capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2)
# weight last 36 higher because they are the forecasts we should care about most
cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
if(cost_ricco < best_cost_wide_ricco) {
best_cost_wide_ricco = cost_ricco
best_rsme_s0_d1_p_search_wide = rolling.res.long_ricco$rwRMSE
best_last36_ase_wide = prediction_ase_ricco
best_p_wide_ricco = p_try
best_q_wide_ricco = q_try
}
},
error = function(e) {
print(paste("an error occured for p: ", p_try, "and q: ", q_try))
was_error = TRUE
})
if(was_error)
next
}
}
print(paste("best long + immediate rolling window p with wide search was:", best_p_wide_ricco))
print(paste("best long + immediate rolling window q with wide search was:", best_q_wide_ricco))
# now find best p and q with a rolling window narrow grid search
best_p_narrow_ricco = 0
best_q_narrow_ricco = 0
best_cost_narrow_ricco = 10000
best_rsme_s0_d1_p_search_narrow = 10000.0
best_last36_ase_narrow = 10000.0
min_q_narrow_ricco = max(c(1, best_q_wide_ricco-2))
min_p_narrow_ricco = max(c(1, best_p_wide_ricco-4))
q_seq_narrow_ricco = seq(min_q_narrow_ricco, best_q_wide_ricco+1, 1)
p_seq_narrow_ricco = seq(min_p_narrow_ricco, best_p_wide_ricco+2, 1)
best_arma_estimates = NULL
for(q_try in q_seq_narrow_ricco) {
for (p_try in p_seq_narrow_ricco) {
was_error = FALSE
tryCatch(expr = {
print(paste("trying p: ", p_try, "and q: ", q_try))
capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0, horizon = 36, plot=FALSE)
capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2)
# weight last 36 higher because they are the forecasts we should care about most
cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
if(cost_ricco < best_cost_narrow_ricco) {
best_cost_narrow_ricco = cost_ricco
best_rsme_s0_d1_p_search_narrow = rolling.res.long_ricco$rwRMSE
best_last36_ase_narrow = prediction_ase_ricco
best_p_narrow_ricco = p_try
best_q_narrow_ricco = q_try
best_arma_estimates = est_p_q_search_ricco
}
},
error = function(e) {
print(paste("an error occured for p: ", p_try, "and q: ", q_try))
was_error = TRUE
})
if(was_error)
next
}
}
print(paste("best long + immediate rolling window p with narrow search was:", best_p_narrow_ricco))
print(paste("best long + immediate rolling window q with narrow search was:", best_q_narrow_ricco))
best_rsme_s0_d1_p_search_narrow^2
best_last36_ase_narrow
length(df$AVG_EP)
# best P was p=30 q=11, WOW! A huge and complex model BUT it was rolling window RSME verified!
# I wonder how the ARIMA(30,1,11) would do on future, unseen data. it’s probably just REALLY good at fitting the signal we have, even if we rolling window verify. true cross-validation would be better, if we had the data.
print(paste("best long rolling (36) ase was:", best_rsme_s0_d1_p_search^2, "for p:", best_p_ricco, "for q:", best_q_ricco, "and d = 1"))
# last 3 ASE
f_ricco_3 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=3, lastn=TRUE)
prediction_ase_ricco_3 = mean((df$AVG_EP[(266-3):266]-f_ricco_3$f)^2)
print(paste("Last 3 ASE was s:", prediction_ase_ricco_3, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))
# last 36 ASE
f_ricco_36 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=36, lastn=TRUE)
prediction_ase_ricco_36 = mean((df$AVG_EP[(266-35):266]-f_ricco_36$f)^2)
print(paste("Last 36 ASE was s:", prediction_ase_ricco_36, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))
rolling.res.short_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=best_arma_estimates$phi, theta=best_arma_estimates$theta, d=1, s=0, horizon = 3, plot=FALSE)
# Rolling
rolling.res.short_ricco$rwRMSE^2
best_arma_estimates_SSE = sum(best_arma_estimates$res^2)
aic = 266*log(best_arma_estimates_SSE/266) +2*(30+11+1)
aic
# residuals look VERY white, maybe a slight amount of heteroskeascitity and trend at the very end?
plotts.sample.wge(f_ricco$resid)
# interestingly, ljung box test cannot reject non-stationarity in the residuals! This seems weird.
ljung.wge(f_ricco$resid)
best_arma_estimates
fore.aruma.wge from cran, and doesn’t print), to use the
latest tswge, replace fore.aruma.wge with
fore.arima.wge)#Rolling Window RMSE Function
# series is the array of the series
# horizon is how far you want to predict into the future
# d is the order of the differencing: (1-B^)^d
# s is the order of the seasonality: (1-B^s)
# phi = coefficients of the stationary AR term
# theta = coefficients of the invertible MA term
# It simply takes the given horizon and the model in the form of s,d,phis and
# thetas and figures out how many windows it can create in the data (series) and then calculates the ASE for each window.
#The output is the average off all the ASEs from each individual window.
roll.win.rmse.wge_ricco = function(series, horizon = 2, s = 0, d = 0, phi = 0, theta = 0, plot=FALSE)
{
#DEFINE fore.arma.wge2
fore.arma.wge2=function(x,phi=0,theta=0,n.ahead=5,lastn=FALSE, plot=FALSE,alpha=.05,limits=TRUE, xbar2 = NULL)
{
# lastn=TRUE indicates that the last n data values are to be forecast
# lastn=FALSE (default) indicates we want foreacts for n values beyond the end of the realization
n=length(x)
p=length(phi)
if(sum(phi^2)==0) {p=0}
q=length(theta)
if(sum(theta^2)==0) {q=0}
#resid=rep(0,n)
npn.ahead=n+n.ahead
xhat=rep(0,npn.ahead)
if(is.null(xbar2))
{
xbar=mean(x)
}
else
{
xbar = xbar2
}
const=1
if (p > 0) {for(jp in 1:p) {const=const-phi[jp]}}
#
#
# Calculate Box-Jenkins Forecasts
#
#
#Calculating Residuals
#
resid=backcast.wge(x,phi,theta,n.back=50)
#
#
#maconst=const*xbar
#p1=max(p+1,q+1)
#for (i in p1:n) {resid[i]=x[i]
# if ( p > 0) {for (jp in 1:p) {resid[i]=resid[i]-phi[jp]*x[i-jp]}}
# if (q > 0) {for (jq in 1:q) {resid[i]=resid[i]+theta[jq]*resid[i-jq]}}
# resid[i]=resid[i]-maconst}
#
# Calculating Forecasts
#
#
npn.ahead=n+n.ahead
xhat=rep(0,npn.ahead)
mm=n
#
#lastn = TRUE
#
if(lastn==TRUE) {mm=n-n.ahead}
#
#
for (i in 1:mm) {xhat[i]=x[i]}
for (h in 1:n.ahead) {
if (p > 0) {for (jp in 1:p) {xhat[mm+h]=xhat[mm+h]+phi[jp]*xhat[mm+h-jp]}}
if ((h<=q)&(h>0)) {for(jq in h:q) {xhat[mm+h]=xhat[mm+h]-theta[jq]*resid[mm+h-jq]}}
xhat[mm+h]=xhat[mm+h]+xbar*const}
#
#
# Calculate psi weights for forecasts limits
#
#
xi=psi.weights.wge(phi,theta,lag.max=n.ahead)
#
#
#
#Setting up for plots
nap1=n.ahead+1
fplot=rep(0,nap1)
maxh=mm+n.ahead
llplot=rep(0,nap1)
ulplot=rep(0,nap1)
f=rep(0,nap1)
ll=rep(0,nap1)
ul=rep(0,nap1)
wnv=0
xisq=rep(0,n.ahead)
se=rep(0,n.ahead)
se0=1
for (i in 1:n) {wnv=wnv+resid[i]**2}
wnv=wnv/n
xisq[1]=1
for (i in 2:n.ahead) {xisq[i]=xisq[i-1]+xi[i-1]^2}
for (i in 1:n.ahead) {se[i]=sqrt(wnv*xisq[i])}
fplot[1]=x[mm]
for (i in 1:n.ahead) {fplot[i+1]=xhat[mm+i]}
ulplot[1]=x[mm]
#for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]+1.96*se[i]}
for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]-qnorm(alpha/2)*se[i]}
llplot[1]=x[mm]
#for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]-1.96*se[i]}
for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]+qnorm(alpha/2)*se[i]}
#
if(limits==FALSE) {
if(lastn==TRUE) {max=max(x,xhat[1:n])
min=min(x,xhat[1:n])}
else {max=max(x,xhat)
min=min(x,xhat)}}
if(limits==TRUE) {min=min(x,llplot)
max=max(x,ulplot)}
#numrows <- 1
#numcols <- 1
timelab <- 'Time'
valuelab <- ''
#fig.width <- 5
#fig.height <- 2.5
cex.labs <- c(.8,.7,.8)
#par(mfrow=c(numrows,numcols),mar=c(6,2,3,1))
t<-1:n;
np1=n+1
np.ahead=mm+n.ahead
tf<-mm:np.ahead
#if (plot=='TRUE') {
#fig.width <- 5
#fig.height <- 2.5
#cex.labs <- c(1.2,1.2,1.2)
#par(mfrow=c(numrows,numcols),mar=c(9,4,3,2))
#plot(t,x,type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1,maxh),ylim=c(min,max),col=1)
#axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3);
#axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
#abline=mean(x)
#mtext(side=c(1,2,1),cex=cex.labs,text=c(timelab,valuelab,""),line=c(1.2,2.1,1.8))
#points(tf,fplot,type='o',lty=1,cex=.6,lwd=1,pch=1,col=2);
#if(limits=='TRUE') {points(tf,ulplot,type='l',lty=2,cex=0.6,lwd=.75,pch=1,col=4)
#points(tf,llplot,type='l',lty=3,cex=0.6,lwd=.75,pch=1,col=4)
# }
#}
np1=n+1
nap1=n.ahead+1
f=fplot[2:nap1]
# Calculate RMSE and MAD
if(lastn==TRUE){
t.start=n-n.ahead
sum.rmse=0
sum.mad=0
for(i in 1:n.ahead) {sum.rmse=sum.rmse+(f[i]-x[t.start+i])^2
sum.mad=sum.mad+abs(f[i]-x[t.start+i])}
mse=sum.rmse/n.ahead
rmse=sqrt(mse)
mad=sum.mad/n.ahead
}
ll=llplot[2:nap1]
ul=ulplot[2:nap1]
if(lastn==TRUE){out1=list(f=f)}
if(lastn==FALSE){out1=list(f=f)}
return(out1)
}
numwindows = 0
RMSEHolder = numeric()
if(s == 0 & d == 0)
{
trainingSize = max(length(phi),length(theta)) + 1 # The plus 1 is for the backcast residuals which helps with ARMA model with q > 0
numwindows = length(series)-(trainingSize + horizon) + 1
RMSEHolder = numeric(numwindows)
# print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
for( i in 1:numwindows)
{
forecasts <- fore.arma.wge2(series[i:(i+(trainingSize-1))], plot = TRUE, phi = phi, theta = theta,n.ahead = horizon, xbar = mean(series))
RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
RMSEHolder[i] = RMSE
}
}
else
{
trainingSize = sum(length(phi),length(theta),s, d) + 1 # sum and plus one is to help backcast.wge, lag.max and ylim plotting issue in fore.arima.wge
numwindows = length(series)-(trainingSize + horizon) + 1
RMSEHolder = numeric(numwindows)
# print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
for( i in 1:numwindows)
{
#invisible(capture.output(forecasts <- fore.arima.wge(series[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = s, d = d,n.ahead = horizon)))
forecasts <- fore.aruma.wge(series[i:(i+(trainingSize-1))],phi = phi, s = s, d = d, theta = theta,n.ahead = horizon, plot=plot)
RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
RMSEHolder[i] = RMSE
}
}
RMSEHolder
#hist(RMSEHolder, main = "RMSEs for Individual Windows")
WindowedRMSE = mean(RMSEHolder)
# print("The Summary Statistics for the Rolling Window RMSE Are:")
# print(summary(RMSEHolder))
# print(paste("The Rolling Window RMSE is: ",round(WindowedRMSE,3)))
#output
invisible(list(rwRMSE = WindowedRMSE, trainingSize = trainingSize, numwindows = numwindows, horizon = horizon, s = s, d = d, phi = phi, theta = theta, RMSEs = RMSEHolder))
}